
clear all

/*

This file combines appends block assignment (geography) information and block relationship files (changes over time)

Note that for pre-2010 block assignments, we must merge block identifiers from 2010 to block assignment file (since 2000 IDs are used in BAF and 2010 IDs are used in spending and covariate datasets)

1. First download block assignment files (BAF)

 2000 Accessed here:
 https://www2.census.gov/census_2000/datasets/redistricting_file--pl_94-171/
 
 2010 Accessed here:  
 https://www2.census.gov/census_2000/datasets/State_Legislative_Districts/SLD_Block_Supplement/
 
 2012 Accessed here: 
 https://www.census.gov/geographies/mapping-files/2012/dec/rdo/2012-state-legislative-bef.html

2. Save to 3 folders in $RawDataPath/, respectively: census_sldb_post2000, census_sldb_pre2010, census_sldb_post2010

3. Download 2000 to 2010 block crosswalk and save as $RawDataPath/blockrel_00to10

This file also uses the data dictionary 1_Data/input/sldb.dct
*/



// post 2000 BAF (merges with 2004 elections and 2007 spending) 

cd "$RawDataPath/census_sldb_post2000"

clear all
tempfile base
save `base', replace emptyok

// exclude nc, tx, and mn, la
local st al ak az ar ca co ct de fl ga hi id il in ia ks ky me md ma mi ms mo mt ne nv nh nj nm ny nd oh ok or pa ri sc sd tn ut vt va wa wv wi wy

foreach i in `st'{

unzipfile "`i'geo.upl.zip", replace
//rename to add txt extension for reading into STATA
! mv "$RawDataPath/census_sldb_post2000/`i'geo.upl" "$RawDataPath/census_sldb_post2000/`i'geo.txt"


//import using dictionary for fixed file census block assignment data
infile using "$InputPath/sldb.dct", using("$RawDataPath/census_sldb_post2000/`i'geo.txt") clear 

keep  fstate fcounty tract block sldu sldl vtd vtdi state

append using `base'
save `base', replace

rm "$RawDataPath/census_sldb_post2000/`i'geo.txt"
}

destring fcounty tract block, replace

drop if block==. 
rename (tract block) (ftract fblock)

gen censusyear=2000
gen year=2007

append using "$InputPath/District Maps/NC_block_assign_07"			
append using "$InputPath/District Maps/TX_block_assign_07"
append using "$InputPath/District Maps/MN_block_assign_07"
append using "$InputPath/District Maps/LA_block_assign_07"

replace year=2007

cd "$OutputPath"
save censusblock_assign_post2000, replace

/// pre 2010 (done in 2006, merges with 2010 elections and 2012 spending)
//  note that block assignment file uses 2000 block IDs, but will need to merge with other datasets that use 2010 block IDs

clear all
tempfile base
save `base', replace emptyok

// exclude nc, tx, and mn
local st al ak az ar ca co ct de fl ga hi id il in ia la ks ky me md ma mi ms mo mt ne nv nh nj nm ny nd oh ok or pa ri sc sd tn ut vt va wa wv wi wy

foreach i in `st'{
cd "$RawDataPath/census_sldb_pre2010"
unzipfile "`i'geo_slb", replace
//rename to add txt extension for reading into STATA
! mv "$RawDataPath/census_sldb_pre2010/`i'geo_slb" "$RawDataPath/census_sldb_pre2010/`i'geo_slb.txt"

infile using "$CodePath/sldb.dct", using("`i'geo_slb.txt") clear 

statastates, abb(state) 
keep if _m==3
drop _m

keep  fstate fcounty tract block sldu sldl vtd vtdi state state_fips

destring fcounty tract block, replace

tempfile temp
save `temp', replace


if state_fips<10 {
	levelsof state_fips 
	local s "0`r(levels)'"
}

if state_fips>=10 {
	levelsof state_fips 
	local s "`r(levels)'"
}

// import 2010 block identifiers
	cd "$RawDataPath/blockrel_00to10"
	unzipfile "TAB2000_TAB2010_ST_`s'_v2.zip", replace
	import delim "TAB2000_TAB2010_ST_`s'_v2.txt", clear

		rename (state_2000 county_2000 tract_2000 blk_2000) (fstate fcounty tract block)

// merge with pre2010 block assignment
		merge m:1 fstate fcounty tract block using `temp'
		keep if _merge==3
		gen flag=(block_part_flag_r=="p") //flags problems: two 2000 blocks merge into 1 2010 block (creates potential problem, which district to keep track of?)

		gen pinblock=arealand_int/arealand_2010 //percent of 2010 block in the 2000 block

		save `temp', replace
		
		
		//collapse to sldl-2010 block level, to merge with 2010 data
		collapse (sum) pinblock (mean) flag  fstate fcounty tract block (firstnm) vtd, by(state_2010 county_2010 tract_2010 blk_2010 sldl)

		rename pinblock pinblock_sldl
		
		sort state_2010 county_2010 tract_2010 blk_2010 sldl
		gen problem=(blk_2010[_n]==blk_2010[_n-1] & sldl[_n]!=sldl[_n-1])
		sum problem
		gen pproblemblock=r(mean)

		drop fstate fcounty tract block flag problem //2000 values
		rename (state_2010 county_2010 tract_2010 blk_2010) (fstate fcounty tract block)

		tempfile sldl 
		save `sldl', replace 
		
		// repeat for sldu
		 
		use `temp', clear 
		collapse (sum) pinblock (mean) flag  fstate fcounty tract block (firstnm) vtd, by(state_2010 county_2010 tract_2010 blk_2010 sldu)
		
		rename pinblock pinblock_sldu
	

		sort state_2010 county_2010 tract_2010 blk_2010 sldu
		gen problem=(blk_2010[_n]==blk_2010[_n-1] & sldu[_n]!=sldu[_n-1])
		sum problem
		gen pproblemblock=r(mean)

		drop fstate fcounty tract block flag problem //2000 values
		rename (state_2010 county_2010 tract_2010 blk_2010) (fstate fcounty tract block)

		merge m:m fstate fcounty tract block using `sldl'
		
		rm "TAB2000_TAB2010_ST_`s'_v2.txt"
		
		
		append using `base'
		save `base', replace

rm "$RawDataPath/census_sldb_pre2010/`i'geo_slb.txt"
	
}

drop if block==. 
rename (tract block) (ftract fblock)

gen censusyear=2010 
gen year=2012

append using "$InputPath/District Maps/NC_block_assign_12"
append using "$InputPath/District Maps/TX_block_assign_12"
append using "$InputPath/District Maps/MN_block_assign_12"

replace year=2012 

cd "$OutputPath/"
save censusblock_assign_pre2010, replace


/// post-2010 census (merge with 2014 election and 2017 and after)

cd "$RawDataPath/census_sldb_post2010/"
//unzipfile "sldl_post2010.zip", replace   
import delim "National_SLDL.txt", clear stringcols(_all) 

//use 15 digit GEO identifier to generate fips codes
gen fstate=substr(blockid,1,2)
gen fcounty=substr(blockid,3,3)
gen tract=substr(blockid,6,6)
gen block=substr(blockid,12,4)

rename (district name) (sldl sldlname)
destring fstate fcounty tract block, replace
drop if fstate==72 | fstate==11 // DC and PR


// AK KY ME MN MT PA TX AND UT updated maps in 2014 

drop if fstate==2 | fstate==21 | fstate==23 | fstate==27 | fstate==30 | fstate==42 | fstate==48 | fstate==49

tempfile sldl
save `sldl', replace

	import delim "$RawDataPath/census_sldb_post2010/National_2014SLDL.txt", clear stringcols(_all) 
	
	gen fstate=substr(blockid,1,2)
	destring fstate, replace
	keep if fstate==2 | fstate==21 | fstate==23 | fstate==27 | fstate==30 | fstate==42 | fstate==48 | fstate==49
		
	gen fcounty=substr(blockid,3,3)
	gen tract=substr(blockid,6,6)
	gen block=substr(blockid,12,4)

	rename (district) (sldl )
	destring  fcounty tract block, replace

append using `sldl'
save `sldl', replace


cd "$RawDataPath/census_sldb_post2010/"
//unzipfile "sldu_post2010.zip", replace
import delim "National_SLDU.txt", clear stringcols(_all) 


//use 15 digit GEO identifier to generate fips codes
gen fstate=substr(blockid,1,2)
gen fcounty=substr(blockid,3,3)
gen tract=substr(blockid,6,6)
gen block=substr(blockid,12,4)

rename (district name) (sldu slduname)
destring fstate fcounty tract block, replace
drop if fstate==72 | fstate==11 // DC and PR

keep fstate fcounty tract block sldu slduname


// AK KY ME MN MT PA TX AND UT updated maps in 2014 

drop if fstate==2 | fstate==21 | fstate==23 | fstate==27 | fstate==30 | fstate==42 | fstate==48 | fstate==49

tempfile sldu
save `sldu', replace

	import delim "$RawDataPath/census_sldb_post2010/National_2014SLDU.txt", clear stringcols(_all) 
	
	gen fstate=substr(blockid,1,2)
	destring fstate, replace
	keep if fstate==2 | fstate==21 | fstate==23 | fstate==27 | fstate==30 | fstate==42 | fstate==48 | fstate==49
		
	gen fcounty=substr(blockid,3,3)
	gen tract=substr(blockid,6,6)
	gen block=substr(blockid,12,4)

	rename (district) (sldu )
	destring  fcounty tract block, replace

append using `sldu'

merge 1:1 fstate fcounty tract block using `sldl'
drop _merge

drop if block==. 
rename (tract block) (ftract fblock)

drop blockid

gen censusyear=2010 

gen year=2017 //to be merged with 2017 spending

cd "$OutputPath"
save censusblock_assign_post2010, replace






